import os
import re
import sys
import time
import jieba
import random
import json
import platform
import requests
import pprint
import jieba.posseg as pseg

from pymongo import MongoClient
from selenium import webdriver
from bs4 import BeautifulSoup

client127 = MongoClient('127.0.0.1', 27017, connect = False)

itjuzi_db127 = client127['itjuzi_db']
itjuzi_coll127 = itjuzi_db127['itjuzi_coll']

if __name__ == '__main__':
    for item in itjuzi_coll127.find(no_cursor_timeout = True):
        if 'basic' in item:
            print(item['basic']['com_registered_name'], '已抓取')
            continue

        h = {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'en,zh-CN;q=0.9,zh;q=0.8,en-US;q=0.7',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Host': 'www.itjuzi.com',
            'Cookie': 'acw_tc=781bad0615535783076525365e329c407b8a50cc3c383d3a0d9cbb819393f3; _ga=GA1.2.274236076.1553578308; _gid=GA1.2.1516241056.1553578308; gr_user_id=b7562835-49be-40b6-8ece-f2b92bb84010; juzi_user=707047; MEIQIA_VISIT_ID=1Iz7eX1wW5UFXrizDaPPgw5NpI2; MEIQIA_EXTRA_TRACK_ID=1Iz7eaKqeJV2FAUH6CdhgHCQink; gr_session_id_eee5a46c52000d401f969f4535bdaa78=2c40986c-9379-4820-b695-f963a4e29279; Hm_lvt_1c587ad486cdb6b962e94fc2002edf89=1553578372,1553669011,1553750627,1553820604; gr_session_id_eee5a46c52000d401f969f4535bdaa78_2c40986c-9379-4820-b695-f963a4e29279=true; juzi_token=Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczpcL1wvd3d3Lml0anV6aS5jb21cL2FwaVwvdXNlcnNcL3VzZXJfaGVhZGVyX2luZm8iLCJpYXQiOjE1NTM2NzY3NjQsImV4cCI6MTU1MzgyNDIwMiwibmJmIjoxNTUzODIwNjAyLCJqdGkiOiJiajE4Zk9sdkVJOW1ISzR0Iiwic3ViIjo3MDcwNDcsInBydiI6IjIzYmQ1Yzg5NDlmNjAwYWRiMzllNzAxYzQwMDg3MmRiN2E1OTc2ZjcifQ.m4YSZknMWnwYVx6Q4dYNHx1MHz5abllqJ88q3q0ST08; vivi9dd=y; dkwlsn5=y; Hm_lpvt_1c587ad486cdb6b962e94fc2002edf89=1553820610; shsp32i=y',
            'Pragma': 'no-cache',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
        }

        r = requests.get('https://www.itjuzi.com/api/companies/' + item['comp_id'] + '?type=basic', headers = h)
        resp = r.json()

        if 'status' in resp and resp['status'] == 'success':
            basic = resp['data']['basic']
            itjuzi_coll127.update_one({'comp_id': item['comp_id']}, {'$set': {
                'basic': basic
            }})

            pprint.pprint(basic['com_registered_name'])
        else:
            print('error,please check!')

        time.sleep(2)
